BLK-OBJS += block-ram.o
BLK-OBJS += block-qcow.o
BLK-OBJS += aes.o
+BLK-OBJS += tapaio.o
all: $(IBIN) qcow-util
#include <sys/ioctl.h>
#include <linux/fs.h>
#include "tapdisk.h"
-
-
-/**
- * We used a kernel patch to return an fd associated with the AIO context
- * so that we can concurrently poll on synchronous and async descriptors.
- * This is signalled by passing 1 as the io context to io_setup.
- */
-#define REQUEST_ASYNC_FD 1
+#include "tapaio.h"
#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
int fd;
/* libaio state */
- io_context_t aio_ctx;
+ tap_aio_context_t aio_ctx;
struct iocb iocb_list [MAX_AIO_REQS];
struct iocb *iocb_free [MAX_AIO_REQS];
struct pending_aio pending_aio[MAX_AIO_REQS];
int iocb_free_count;
struct iocb *iocb_queue[MAX_AIO_REQS];
int iocb_queued;
- int poll_fd; /* NB: we require aio_poll support */
struct io_event aio_events[MAX_AIO_REQS];
};
for(i = 0; i < MAX_IOFD; i++)
dd->io_fd[i] = 0;
- dd->io_fd[0] = prv->poll_fd;
+ dd->io_fd[0] = prv->aio_ctx.pollfd;
}
/* Open the disk file and initialize aio state. */
/* Initialize AIO */
prv->iocb_free_count = MAX_AIO_REQS;
prv->iocb_queued = 0;
-
- prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
- prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx);
- if (prv->poll_fd < 0) {
- ret = prv->poll_fd;
+ ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS);
+ if (ret < 0) {
if (ret == -EAGAIN) {
DPRINTF("Couldn't setup AIO context. If you are "
"trying to concurrently use a large number "
"(e.g. 'echo echo 1048576 > /proc/sys/fs/"
"aio-max-nr')\n");
} else {
- DPRINTF("Couldn't get fd for AIO poll support. This "
- "is probably because your kernel does not "
- "have the aio-poll patch applied.\n");
+ DPRINTF("Couldn't setup AIO context.\n");
}
goto done;
}
if (!prv->iocb_queued)
return 0;
- ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+ ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue);
/* XXX: TODO: Handle error conditions here. */
{
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
- io_destroy(prv->aio_ctx);
+ io_destroy(prv->aio_ctx.aio_ctx);
close(prv->fd);
return 0;
int tdaio_do_callbacks(struct disk_driver *dd, int sid)
{
- int ret, i, rsp = 0;
+ int i, nr_events, rsp = 0;
struct io_event *ep;
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
- /* Non-blocking test for completed io. */
- ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
- NULL);
-
- for (ep=prv->aio_events,i=ret; i-->0; ep++) {
+ nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+ for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
struct iocb *io = ep->obj;
struct pending_aio *pio;
prv->iocb_free[prv->iocb_free_count++] = io;
}
+
+ if (nr_events) {
+ nr_events = tap_aio_more_events(&prv->aio_ctx);
+ goto repeat;
+ }
+
+ tap_aio_continue(&prv->aio_ctx);
+
return rsp;
}
#include "bswap.h"
#include "aes.h"
#include "tapdisk.h"
+#include "tapaio.h"
#if 1
#define ASSERT(_p) \
(l + (s - 1)) - ((l + (s - 1)) % s)); \
})
-/******AIO DEFINES******/
-#define REQUEST_ASYNC_FD 1
-
struct pending_aio {
td_callback_t cb;
int id;
AES_KEY aes_encrypt_key; /*AES key*/
AES_KEY aes_decrypt_key; /*AES key*/
/* libaio state */
- io_context_t aio_ctx;
+ tap_aio_context_t aio_ctx;
int max_aio_reqs;
struct iocb *iocb_list;
struct iocb **iocb_free;
int iocb_free_count;
struct iocb **iocb_queue;
int iocb_queued;
- int poll_fd; /* NB: we require aio_poll support */
struct io_event *aio_events;
};
static int init_aio_state(struct disk_driver *dd)
{
- int i;
+ int i, ret;
struct td_state *bs = dd->td_state;
struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
long ioidx;
goto fail;
}
- /*Signal kernel to create Poll FD for Asyc completion events*/
- s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
- s->poll_fd = io_setup(s->max_aio_reqs, &s->aio_ctx);
-
- if (s->poll_fd < 0) {
- if (s->poll_fd == -EAGAIN) {
+ ret = tap_aio_setup(&s->aio_ctx, s->aio_events, s->max_aio_reqs);
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
DPRINTF("Couldn't setup AIO context. If you are "
"trying to concurrently use a large number "
"of blktap-based disks, you may need to "
"(e.g. 'echo echo 1048576 > /proc/sys/fs/"
"aio-max-nr')\n");
} else {
- DPRINTF("Couldn't get fd for AIO poll support. This "
- "is probably because your kernel does not "
- "have the aio-poll patch applied.\n");
+ DPRINTF("Couldn't setup AIO context.\n");
}
goto fail;
}
for(i = 0; i < MAX_IOFD; i++)
dd->io_fd[i] = 0;
- dd->io_fd[0] = s->poll_fd;
+ dd->io_fd[0] = s->aio_ctx.pollfd;
}
/* Open the disk file and initialize qcow state. */
if (!prv->iocb_queued)
return 0;
- ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+ ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue);
/* XXX: TODO: Handle error conditions here. */
close(fd);
}
- io_destroy(s->aio_ctx);
+ io_destroy(s->aio_ctx.aio_ctx);
free(s->name);
free(s->l1_table);
free(s->l2_cache);
int tdqcow_do_callbacks(struct disk_driver *dd, int sid)
{
- int ret, i, rsp = 0,*ptr;
+ int ret, i, nr_events, rsp = 0,*ptr;
struct io_event *ep;
struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
if (sid > MAX_IOFD) return 1;
-
- /* Non-blocking test for completed io. */
- ret = io_getevents(prv->aio_ctx, 0, prv->max_aio_reqs, prv->aio_events,
- NULL);
- for (ep = prv->aio_events, i = ret; i-- > 0; ep++) {
+ nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+ for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
struct iocb *io = ep->obj;
struct pending_aio *pio;
prv->iocb_free[prv->iocb_free_count++] = io;
}
+
+ if (nr_events) {
+ nr_events = tap_aio_more_events(&prv->aio_ctx);
+ goto repeat;
+ }
+
+ tap_aio_continue(&prv->aio_ctx);
+
return rsp;
}
--- /dev/null
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "tapaio.h"
+#include "tapdisk.h"
+#include <unistd.h>
+
+/**
+ * We used a kernel patch to return an fd associated with the AIO context
+ * so that we can concurrently poll on synchronous and async descriptors.
+ * This is signalled by passing 1 as the io context to io_setup.
+ */
+#define REQUEST_ASYNC_FD 1
+
+/*
+ * If we don't have any way to do epoll on aio events in a normal kernel,
+ * wait for aio events in a separate thread and return completion status
+ * that via a pipe that can be waited on normally.
+ *
+ * To keep locking problems between the completion thread and the submit
+ * thread to a minimum, there's a handshake which allows only one thread
+ * to be doing work on the completion queue at a time:
+ *
+ * 1) main thread sends completion thread a command via the command pipe;
+ * 2) completion thread waits for aio events and returns the number
+ * received on the completion pipe
+ * 3) main thread processes the received ctx->aio_events events
+ * 4) loop back to 1) to let the completion thread refill the aio_events
+ * buffer.
+ *
+ * This workaround needs to disappear once the kernel provides a single
+ * mechanism for waiting on both aio and normal fd wakeups.
+ */
+static void *
+tap_aio_completion_thread(void *arg)
+{
+ tap_aio_context_t *ctx = (tap_aio_context_t *) arg;
+ int command;
+ int nr_events;
+ int rc;
+
+ while (1) {
+ rc = read(ctx->command_fd[0], &command, sizeof(command));
+
+ do {
+ rc = io_getevents(ctx->aio_ctx, 1,
+ ctx->max_aio_events, ctx->aio_events,
+ NULL);
+ if (rc) {
+ nr_events = rc;
+ rc = write(ctx->completion_fd[1], &nr_events,
+ sizeof(nr_events));
+ }
+ } while (!rc);
+ }
+}
+
+void
+tap_aio_continue(tap_aio_context_t *ctx)
+{
+ int cmd = 0;
+
+ if (!ctx->poll_in_thread)
+ return;
+
+ if (write(ctx->command_fd[1], &cmd, sizeof(cmd)) < 0)
+ DPRINTF("Cannot write to command pipe\n");
+}
+
+int
+tap_aio_setup(tap_aio_context_t *ctx,
+ struct io_event *aio_events,
+ int max_aio_events)
+{
+ int ret;
+
+ ctx->aio_events = aio_events;
+ ctx->max_aio_events = max_aio_events;
+ ctx->poll_in_thread = 0;
+
+ ctx->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
+ ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+ if (ret < 0 && ret != -EINVAL)
+ return ret;
+ else if (ret > 0) {
+ ctx->pollfd = ret;
+ return ctx->pollfd;
+ }
+
+ ctx->aio_ctx = (io_context_t) 0;
+ ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+ if (ret < 0)
+ return ret;
+
+ if ((ret = pipe(ctx->command_fd)) < 0) {
+ DPRINTF("Unable to create command pipe\n");
+ return -1;
+ }
+ if ((ret = pipe(ctx->completion_fd)) < 0) {
+ DPRINTF("Unable to create completion pipe\n");
+ return -1;
+ }
+
+ if ((ret = pthread_create(&ctx->aio_thread, NULL,
+ tap_aio_completion_thread, ctx)) != 0) {
+ DPRINTF("Unable to create completion thread\n");
+ return -1;
+ }
+
+ ctx->pollfd = ctx->completion_fd[0];
+ ctx->poll_in_thread = 1;
+
+ tap_aio_continue(ctx);
+
+ return 0;
+}
+
+int
+tap_aio_get_events(tap_aio_context_t *ctx)
+{
+ int nr_events = 0;
+
+ if (!ctx->poll_in_thread)
+ nr_events = io_getevents(ctx->aio_ctx, 1,
+ ctx->max_aio_events, ctx->aio_events, NULL);
+ else
+ read(ctx->completion_fd[0], &nr_events, sizeof(nr_events));
+
+ return nr_events;
+}
+
+int tap_aio_more_events(tap_aio_context_t *ctx)
+{
+ return io_getevents(ctx->aio_ctx, 0,
+ ctx->max_aio_events, ctx->aio_events, NULL);
+}
+
+
--- /dev/null
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __TAPAIO_H__
+#define __TAPAIO_H__
+
+#include <pthread.h>
+#include <libaio.h>
+
+struct tap_aio_context {
+ io_context_t aio_ctx;
+
+ struct io_event *aio_events;
+ int max_aio_events;
+
+ pthread_t aio_thread;
+ int command_fd[2];
+ int completion_fd[2];
+ int pollfd;
+ unsigned int poll_in_thread : 1;
+};
+
+typedef struct tap_aio_context tap_aio_context_t;
+
+int tap_aio_setup (tap_aio_context_t *ctx,
+ struct io_event *aio_events,
+ int max_aio_events);
+void tap_aio_continue (tap_aio_context_t *ctx);
+int tap_aio_get_events (tap_aio_context_t *ctx);
+int tap_aio_more_events(tap_aio_context_t *ctx);
+
+#endif /* __TAPAIO_H__ */